% scribe: Lian Yu
% lastupdate: Oct. 12, 2005
% lecture: 11
% references: Durrett, section 2.2
% title: Background on Convergence in Distribution
% keywords: convergence in distribution, weak convergence, weak $\star$ convergence, weak convergence theorem, central limit theorem
% end

\documentclass[12pt,letterpaper]{article}

\include{macros}

\begin{document}

\lecture{11}{Background on Convergence in Distribution}{Lian Yu}
{liany@ieor.berkeley.edu}

This topic is covered in section 2.2 of \cite{durrett}.

\section{Definition of convergence in distribution}
% keywords: convergence in distribution, weak convergence, weak $\star$ convergence
% end

\begin{definition}

Let $S$ be a metric space, $\mathcal{S}$ = Borel $\sigma-$field on
$S$. Let $P_1,P_2,\cdots$ be a sequence of probability measures on
$(S,\mathcal{S})$. Say $P_n$ \emph{converges in distribution} to $P$ or 
$P_n\stackrel{d}{\longrightarrow}P$ for
some probability measure $P$ on $(S,\mathcal{S})$, if
\begin{align*}
\int fdP_n\rightarrow \int fdP
\end{align*}

for every bounded continuous function $f:S\rightarrow \R$.
\end{definition}

This type of convergence is also often called \emph{weak convergence} or 
\emph{weak$\star$ convergence}.
Note that the limits are unique. This comes down to the fact that the
collection of bounded continuous functions is a determining class.  That is,
\begin{align*}
\int fdP=\int fdQ \text{    for all bounded continuous $f$}
\end{align*}
implies $P(A)=Q(A)$ for all $A\in\mathcal{S}$.

\section{Weak convergence of real random variables}
%keywords: weak convergence theorem
%end

\begin{theorem}
Let $P_n$, $n=1,2,\ldots$, and $P$ be probability measures on
$\R$. The following are equivalent:
\begin{enumerate}
\item $\int fdP_n\rightarrow\int fdP$ for all bounded continuous $f$; \\
\item Same for all infinitely differentiable functions with all
bounded derivatives $C^\infty$;\\
\item $P_n(-\infty,x]\rightarrow P(-\infty,x]$ for all $x$ at
which $x\rightarrow P(-\infty,x]$ is continuous;\\
\item Condition 1 for all $f$ such that $f$ is bounded and
continuous except on a set of $P$ measure $0$; and\\
\item Condition 3 with $P_n(C)$ for all $C$ closed, or with $P_n(O)$ for all
$O$ open.
\end{enumerate}
\end{theorem}

\begin{proof} 

\emph{1 $\Rightarrow$ 3}:

Define $f_{u,v}$ by
\begin{align*}
f_{u,v}(x)=
\begin{cases}
1 &\mbox{  if $x\leq u$ }\\
0 &\mbox{  if $x\geq v$}\\
\mbox{linear}& \mbox{ if $u\leq x\leq v$.}
\end{cases}
\end{align*}
For $\epsilon>0$,
\begin{align*}
f_{x-\epsilon,x}\leq \1(-\infty,x]\leq f_{x,x+\epsilon}.
\end{align*}
Write $Pf$ for $\int fdP$. So if $P_n\rightarrow P$, then
\begin{align*}
P_nf_{x-\epsilon,x}\leq P_n(-\infty,x]\leq P_nf_{x,x+\epsilon}.
\end{align*}
Let $n\rightarrow\infty$,
\begin{align*}
Pf_{x-\epsilon,x}\leq
\liminf_{n\rightarrow\infty}{P_n(-\infty,x]}\leq
\limsup_{n\rightarrow\infty}{P_n(-\infty,x]}\leq Pf_{x,x+\epsilon}
\end{align*}
and
\begin{align*}
P(-\infty,x-\epsilon]\leq Pf_{x-\epsilon,x}\leq P(-\infty,x]\leq
Pf_{x,x+\epsilon}\leq P(-\infty,x+\epsilon].
\end{align*}
Now assume $y\rightarrow P(-\infty,y]$ is continuous at $y=x$. Let
$\epsilon\rightarrow 0$, we see that by taking $\epsilon$
sufficiently small we can make $Pf_{x-\epsilon,x}$ and
$Pf_{x,x+\epsilon}$ as close as we like to $P(-\infty,x]$. Then we
can conclude
\begin{align*}
\liminf_{n\rightarrow\infty}{P_n(-\infty,x]}=\limsup_{n\rightarrow\infty}P_n(-\infty,x]=P(-\infty,x].
\end{align*}


\emph{3 $\Rightarrow$ 1:} 

To show this we use another approximation.
3 gives 
\begin{equation}
P_nf\rightarrow Pf \mbox{ for $f=\1(-\infty,x]$ where $x$ is a continuity
point of the distribution of $P$.}
\label{thm21point3}
\end{equation}

First observe that the set of continuity points of $P$ is dense in
$\R$, as there are only countably many jumps of $x\rightarrow
P(-\infty,x]$.  Second, note that we can extend (\ref{thm21point3}) from 
indicators to finite linear combinations of such indicators, i.e.\ to step 
functions.

Now, let $f$ be continuous and bounded in magnitude by $M$.
Choose some target $\epsilon > 0$ and choose $B$ so that $B$ and $-B$
are both continuity points of the limit distribution 
$x\rightarrow P(-\infty,x]$ and $P(-B,B]^c<\epsilon$.  Note that there exists
$n(\epsilon)$ such that $P_n(-B,B]^c<2\epsilon$ for all $n\geq n(\epsilon)$.

Next, choose a step function $s$ so that
\begin{align*}
|s(x)-f(x)|\leq\epsilon
\end{align*}
for all $x\in(-B,B]$ and $s=0$ outside $(-B,B]$ (this can be done
by uniform continuity of $f$ on $[-B,B]$).

Also
\begin{equation}
|P_nf-P_ns|\leq 2\epsilon M+\epsilon
\label{thm21starstar}
\end{equation}
for $n\geq n(\epsilon)$.  (Note that our $s$ depends on $\epsilon$.)

Choose $n$ even larger so that $|P_ns-Ps|\leq\epsilon$. Thus, by the 
triangle inequality,
\begin{align*}
|P_nf-Ps|\leq 2\epsilon M+2\epsilon.
\end{align*}
We also have (\ref{thm21starstar}) for $n\rightarrow\infty$, so we can
replace $P_n$ by $P$ and put it all together to get
\begin{align*}
|P_nf-Pf|\leq 4\epsilon M+4\epsilon
\end{align*}
for all sufficiently large $n$.
\end{proof}

\section{Central Limit Theorem}
%keywords: central limit theorem
%end

\begin{theorem}
Let $X_1,X_2,\cdots$ be i.i.d. with $\E(X_n)=\mu$,
$\var(x_n)=\sigma^2<\infty$. If $S_n=X_1+\cdots+X_n$, then
\begin{align*}
\frac{S_n-n\mu}{\sigma\sqrt{n}}\stackrel{d}{\longrightarrow}N(0,1)
\end{align*}
\end{theorem}

\begin{proof} 
The key observation is that if the $X_i$'s are normal, then $S_n$ is normal,
and
\begin{align*} \frac{S_n-n\mu}{\sigma
\sqrt{n}}\stackrel{d}{\longrightarrow}N(0,1)\mbox{.}
\end{align*}
This comes back to the addition rule for independent normal variables: if
$X\sim N(\mu,\sigma^2)$, $Y\sim N(\nu,\tau^2)$, and $X$ and $Y$
are independent, then $X+Y\sim N(\mu+\nu,\sigma^2+\tau^2)$.

We introduce some notation: $X\sim N(\mu,\sigma^2)$ means
\begin{align*}
P(X\leq
x)=\int_{-\infty}^x\frac{1}{\sqrt{2\pi}\sigma}e^{-\frac{1}{2}\frac{(y-\mu)^2}{\sigma^2}}dy\mbox{.}
\end{align*}
Define
$\Phi(z)=\int_{-\infty}^z\frac{1}{\sqrt{2\pi}}e^{-\frac{1}{2}y^2}dy$,
then
\begin{eqnarray*}
Z\sim N(0,1)&\Longleftrightarrow& P(Z\leq z)=\Phi(z),\\
X\sim N(\mu,\sigma^2)&\Longleftrightarrow&\frac{x-\mu}{\sigma}\sim
N(0,1)\\
&\Longleftrightarrow& P(X\leq
x)=\Phi\left(\frac{x-\mu}{\sigma}\right)\\
&\Longleftrightarrow& P(X\in
dx)=\frac{1}{\sigma}\phi\left(\frac{x-\mu}{\sigma}\right)dx.
\end{eqnarray*}

There are several ways to prove the addition rule:

\begin{enumerate}
\item use a transform (m.g.f.\ or c.f.);

\item hack it out by the convolution formula (complete the square
inside the exponential):
\begin{align*}
f_{X+Y}(z)=\int_{-\infty}^\infty f_X(x)f_Y(z-x)dx;
\end{align*}
or

\item by a geometric argument.
Look at the case $X\sim N(0,1)$, $Y\sim N(0,1)$ independent
\begin{align*}
P(X\in dx, Y\in
dy)&=f_X(x)dxf_Y(y)dy\\
&=\frac{1}{\sqrt{2\pi}}e^{-\frac{1}{2}x^2}\frac{1}{\sqrt{2\pi}}e^{-\frac{1}{2}y^2}dxdy\\
&=\frac{1}{2\pi}e^{-\frac{1}{2}(x^2+y^2)}dxdy
\end{align*}
For $\sigma^2+\tau^2=1$, $X\sim N(0,\sigma^2)$ and $Y\sim
N(0,\tau^2)$, rotate $X$ and $Y$: then $\cos\theta X+\sin\theta
Y\stackrel{d}{\longrightarrow}N(0,1)$.
\end{enumerate}

\end{proof}


\bibliographystyle{plain}
\bibliography{../books}

\end{document}